library(tidyverse)
library(kableExtra)
library(sf)
library(ggspatial)
library(raster)
library(stars)
library(spatstat)
library(cowplot)

1 Goal

The goal of this file is to experiment with crime grids.

2 Loading the Map Data Files

proj_dir = "/Volumes/GDRIVE_SSD/homes/alex/datascience/698_CAPSTONE_2021_FALL"

working_dir = paste0(proj_dir, "/", "project_code/Part2_GeoMapping/")

data_dir = paste0(proj_dir, "/project_data/")

setwd(working_dir)
raleigh_corporate_limits_file = paste0(data_dir, "Corporate_Limits.geojson")
#
# Load the geojson files
# change the coordinate system to 2264
# filter objects that belong to RALEIGHT
# --------------------------------------------
gj_raleigh <- sf::st_read(raleigh_corporate_limits_file, quiet = TRUE) %>% 
  st_transform(2264) %>% 
  filter( SHORT_NAME == 'RAL' )

We compute the area of the polygonal representation and validate by checking against Wikipedia’s reported area. They are very close.

# Compute the total square miles of the polygons.
# The NAD83 North Carolina projection uses square ft.
# ------------------------------------------------------
area_of_polygons = st_area(gj_raleigh)

sprintf( "Raleigh Area is: %.2f square miles.  Wikipedia gives: 147.64 sq mi" , sum(area_of_polygons) / ( 5280^2)  )
## [1] "Raleigh Area is: 149.06 square miles.  Wikipedia gives: 147.64 sq mi"

3 Visualization

3.1 Plotting the City and Base Layer

Fill the interior with color and add a border in red.

gj_raleigh %>% ggplot() + geom_sf( fill = "skyblue", color = "darkred", size = 0.1) + 
  labs(title = "Raleigh City Limits in Color")

Let’s put a basemap tile underneath and compass rose and alpha blend.

gj_raleigh %>% ggplot() + geom_sf( fill = "skyblue", color = "darkred") + 
  labs(title = "Raleigh City:  Basemap , Compass, Alpha Blending") +
  annotation_map_tile(type="osm", progress = "none", alpha = 0.5 ) +
  annotation_north_arrow(location="bl", which_north = "true")

## Make Grid Cells and Plot Them

# This gives measurements in feet.
grid_cellsize = 5000

The code below makes a grid of size \(N=\) 5000 units. Experiment suggest the grid units are in feet because the st_area call applied to the grid gives a cell area of \(N^2\) units in squared feet.

The bounding box of the grid cells starts in the lower left (South West) \(A\) where the vertical line \(x=v(A)\) through \(A\) and the horizontal line \(y=h(A)\) through \(A\) are tangent to the city boundary and are left and lower bounds respectively.

The upper right corner (North East) does not have to be tangent to the city limits.
– the vertical line \(x=v(C)\) must have a horizontal distance which is an integer multiple of grid_cellsize from \(x=v(A)\). \[v(C) - v(A) = m \cdot \Delta(Grid) \text{ for some positive integer } m > 0\]

– the horizontal line \(y=h(C)\) must have a vertical distance which is an integer multiple of grid_cellsize from \(y=h(A)\). \[h(C) - h(A) = n \cdot \Delta(Grid) \text{ for some positive integer } n > 0\]

a1 = st_make_valid(gj_raleigh) %>% st_make_grid(cellsize=grid_cellsize )

a1 %>% ggplot() + geom_sf( color = "green") +
  geom_sf(data=gj_raleigh, fill="skyblue", alpha = 0.5 ) +
  annotation_map_tile(type="osm", progress = "none", alpha = 0.5 )  +
  ggtitle("Bounding Box and Grid Layout over Raleigh")

Let’s create row, column coordinates for all cells in the grid.

# make bounding box

grid_from_a1 = a1 %>% st_sf() %>% mutate( id_grid = 1:nrow(.))

raleigh_bbox = st_bbox( a1 )

# Calculate the number of rows and columns

nCols = ceiling(( raleigh_bbox[3] - raleigh_bbox[1] ) / grid_cellsize )

nRows = ceiling(( raleigh_bbox[4] - raleigh_bbox[2] ) / grid_cellsize )

The code below excludes grid cells which are dont intersect Raleigh town limits.

# Define a label scheme on each grid cell.  
#
#
grid_from_a1_rclab =  a1 %>% st_sf() %>% mutate(id_grid = 1:nrow(.))  %>% 
  mutate( cols = rep(1:nCols, nRows ) ,
          rows = unlist( lapply(1:nRows, rep, nCols) ) ) %>%
  group_by(id_grid) %>%
  mutate( lab = paste(rows, "-", cols,  collapse = '') ) %>%
  dplyr::filter(id_grid %in% unlist( st_intersects(gj_raleigh, .)))  # Omit grid cells not in raleigh

Then we plot the included grid cells below with their labels.

grid_from_a1_rclab %>% ggplot() + geom_sf() + 
  geom_text( aes( label = lab, geometry = geometry), 
             stat = "sf_coordinates" , size = 1.3) +  
  geom_sf(data=gj_raleigh, fill="skyblue", alpha = 0.5 ) +
  annotation_map_tile(type="osm", progress = "none", alpha = 0.5 ) 

The code below comes from the clever (file:///Volumes/GDRIVE_SSD/lib/documentation/Bibliography/storage/FVQV6WIP/r-fitting-a-grid-over-a-city-map-and-inputting-data-into-grid-squares.html)

a2 = a1 %>% 
     st_intersection(st_make_valid(gj_raleigh) ) %>% 
     st_cast("MULTIPOLYGON") %>% 
     st_sf() %>% 
     mutate(id = row_number())
a2 %>% ggplot() + geom_sf( color = "orange") +
  geom_sf(data=gj_raleigh, fill="skyblue", alpha = 0.5 ) +
  annotation_map_tile(type="osm", progress = "none", alpha = 0.5 ) 

3.2 Geomapping of Assault Incidents in Raleigh

gj_assault_geocoded = st_read(paste0(data_dir, "EXP_RALEIGH_SUBSET_201801_assaults_Aug2021.geojson" ) )
## Reading layer `EXP_RALEIGH_SUBSET_201801_assaults_Aug2021' from data source `/Volumes/GDRIVE_SSD/homes/alex/datascience/698_CAPSTONE_2021_FALL/project_data/EXP_RALEIGH_SUBSET_201801_assaults_Aug2021.geojson' using driver `GeoJSON'
## Simple feature collection with 439 features and 21 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 2059315 ymin: 718624.2 xmax: 2143035 ymax: 806262.8
## Projected CRS: NAD83 / North Carolina (ftUS)
head(gj_assault_geocoded)

3.3 Plot the assaults on the grid

The chart below shows the city of Raleigh overlaid by a uniform grid of squares, each of size \(N \times N\) where \(N=\) 5000 feet. The cells intersecting the city territory are displayed. Assault incidents for the month of Jan 2018 are displayed as red dots at their locations within the city.

grid_from_a1_rclab %>% ggplot() + geom_sf(size=0.5) + 
  geom_text( aes( label = lab, geometry = geometry), 
             stat = "sf_coordinates" , size = 1.3) +  
  geom_sf(data=gj_assault_geocoded, color = "red", size = 0.3) +
  geom_sf(data=gj_raleigh, fill="skyblue", alpha = 0.5 ) +
  annotation_map_tile(type="osm", progress = "none", alpha = 0.5 ) 

3.4 Joining Grid Cells with Crime Data

We start by using a much smaller subset of 10 crime incidents on Wake Forest Road to test the output.

gj_assault_wake_forest = gj_assault_geocoded  %>% 
  filter( str_detect(reported_block_address, "WAKE FOREST RD" ) ) %>% 
  arrange(reported_block_address)

gj_assault_wake_forest %>% dplyr::select(reported_block_address, OBJECTID, district, reported_date)

The first join of CRIMES to CELLS yields 10 rows. All CRIMES belong to some CELL.

gj_assault_wake_forest %>% st_join( grid_from_a1_rclab) %>% dplyr::select(OBJECTID, reported_block_address, reported_date, lab, id_grid)

The second join of CELLS to CRIMES yields 254 rows.
Most CELLS have no CRIMES associated. The rows represent the 10 incidents which occur in 3 grid cells. All other 244 cells have one row with NA value for the OBJECTID which is a column of the incident dataframe.

cells_to_crimes = grid_from_a1_rclab %>% st_join( gj_assault_wake_forest) %>% 
  dplyr::select(OBJECTID, reported_block_address, reported_date, lab, id_grid)

dim(cells_to_crimes)
## [1] 254   6

We can count the number of crime incidents in each cell with the following commands. Note that we truncate the results to just the first 10 rows for brevity. The same code can work for any grid resolution of Raleigh.

grid_from_a1_rclab %>% st_join(gj_assault_wake_forest) %>% group_by(id_grid, lab) %>% 
  summarize( ct = sum( !is.na(OBJECTID))) %>% 
  st_drop_geometry() %>% arrange(desc(ct)) %>% head(n=10) %>% 
  kable() %>% kable_styling(bootstrap_options = c("hover", "striped"))
## `summarise()` has grouped output by 'id_grid'. You can override using the `.groups` argument.
id_grid lab ct
180 9 - 12 6
202 10 - 13 3
246 12 - 15 1
15 1 - 15 0
16 1 - 16 0
17 1 - 17 0
18 1 - 18 0
19 1 - 19 0
31 2 - 10 0
32 2 - 11 0
cell_results = grid_from_a1_rclab %>% st_join(gj_assault_geocoded) %>% group_by(id_grid, lab) %>% 
  summarize( ct = sum( !is.na(OBJECTID))) %>% 
  st_drop_geometry() %>% arrange(desc(ct)) 
## `summarise()` has grouped output by 'id_grid'. You can override using the `.groups` argument.
cell_results %>%  kable() %>% kable_styling(bootstrap_options = c("hover", "striped"))
id_grid lab ct
116 6 - 11 19
95 5 - 11 14
96 5 - 12 13
113 6 - 8 13
203 10 - 14 13
204 10 - 15 13
225 11 - 15 13
117 6 - 12 9
202 10 - 13 9
141 7 - 15 8
97 5 - 13 7
115 6 - 10 7
159 8 - 12 7
180 9 - 12 7
50 3 - 8 6
52 3 - 10 6
79 4 - 16 6
119 6 - 14 6
120 6 - 15 6
139 7 - 13 6
243 12 - 12 6
53 3 - 11 5
54 3 - 12 5
75 4 - 12 5
76 4 - 13 5
182 9 - 14 5
223 11 - 13 5
224 11 - 14 5
226 11 - 16 5
247 12 - 16 5
59 3 - 17 4
70 4 - 7 4
71 4 - 8 4
92 5 - 8 4
99 5 - 15 4
118 6 - 13 4
161 8 - 14 4
162 8 - 15 4
200 10 - 11 4
217 11 - 7 4
32 2 - 11 3
55 3 - 13 3
90 5 - 6 3
91 5 - 7 3
98 5 - 14 3
112 6 - 7 3
133 7 - 7 3
136 7 - 10 3
137 7 - 11 3
138 7 - 12 3
160 8 - 13 3
205 10 - 16 3
245 12 - 14 3
263 13 - 11 3
310 15 - 16 3
318 16 - 3 3
374 18 - 17 3
38 2 - 17 2
51 3 - 9 2
57 3 - 15 2
72 4 - 9 2
73 4 - 10 2
78 4 - 15 2
80 4 - 17 2
110 6 - 5 2
111 6 - 6 2
114 6 - 9 2
135 7 - 9 2
153 8 - 6 2
155 8 - 8 2
181 9 - 13 2
183 9 - 15 2
196 10 - 7 2
198 10 - 9 2
218 11 - 8 2
220 11 - 10 2
221 11 - 11 2
222 11 - 12 2
238 12 - 7 2
239 12 - 8 2
241 12 - 10 2
242 12 - 11 2
260 13 - 8 2
262 13 - 10 2
264 13 - 12 2
266 13 - 14 2
268 13 - 16 2
279 14 - 6 2
284 14 - 11 2
296 15 - 2 2
297 15 - 3 2
317 16 - 2 2
331 16 - 16 2
373 18 - 16 2
394 19 - 16 2
37 2 - 16 1
77 4 - 14 1
94 5 - 10 1
100 5 - 16 1
121 6 - 16 1
131 7 - 5 1
142 7 - 16 1
143 7 - 17 1
158 8 - 11 1
163 8 - 16 1
178 9 - 10 1
184 9 - 16 1
206 10 - 17 1
219 11 - 9 1
237 12 - 6 1
244 12 - 13 1
246 12 - 15 1
249 12 - 18 1
257 13 - 5 1
267 13 - 15 1
270 13 - 18 1
275 14 - 2 1
278 14 - 5 1
281 14 - 8 1
282 14 - 9 1
283 14 - 10 1
286 14 - 13 1
289 14 - 16 1
291 14 - 18 1
300 15 - 6 1
307 15 - 13 1
308 15 - 14 1
352 17 - 16 1
15 1 - 15 0
16 1 - 16 0
17 1 - 17 0
18 1 - 18 0
19 1 - 19 0
31 2 - 10 0
33 2 - 12 0
34 2 - 13 0
35 2 - 14 0
36 2 - 15 0
39 2 - 18 0
40 2 - 19 0
47 3 - 5 0
48 3 - 6 0
49 3 - 7 0
56 3 - 14 0
58 3 - 16 0
60 3 - 18 0
68 4 - 5 0
69 4 - 6 0
74 4 - 11 0
89 5 - 5 0
93 5 - 9 0
101 5 - 17 0
122 6 - 17 0
132 7 - 6 0
134 7 - 8 0
140 7 - 14 0
144 7 - 18 0
152 8 - 5 0
154 8 - 7 0
156 8 - 9 0
157 8 - 10 0
164 8 - 17 0
165 8 - 18 0
172 9 - 4 0
173 9 - 5 0
174 9 - 6 0
175 9 - 7 0
176 9 - 8 0
177 9 - 9 0
179 9 - 11 0
185 9 - 17 0
186 9 - 18 0
192 10 - 3 0
193 10 - 4 0
194 10 - 5 0
195 10 - 6 0
197 10 - 8 0
199 10 - 10 0
201 10 - 12 0
207 10 - 18 0
208 10 - 19 0
209 10 - 20 0
210 10 - 21 0
212 11 - 2 0
213 11 - 3 0
214 11 - 4 0
215 11 - 5 0
216 11 - 6 0
227 11 - 17 0
228 11 - 18 0
229 11 - 19 0
230 11 - 20 0
231 11 - 21 0
234 12 - 3 0
235 12 - 4 0
236 12 - 5 0
240 12 - 9 0
248 12 - 17 0
250 12 - 19 0
251 12 - 20 0
255 13 - 3 0
256 13 - 4 0
258 13 - 6 0
259 13 - 7 0
261 13 - 9 0
265 13 - 13 0
269 13 - 17 0
271 13 - 19 0
274 14 - 1 0
276 14 - 3 0
277 14 - 4 0
280 14 - 7 0
285 14 - 12 0
287 14 - 14 0
288 14 - 15 0
290 14 - 17 0
292 14 - 19 0
293 14 - 20 0
295 15 - 1 0
298 15 - 4 0
299 15 - 5 0
301 15 - 7 0
302 15 - 8 0
303 15 - 9 0
304 15 - 10 0
305 15 - 11 0
306 15 - 12 0
309 15 - 15 0
311 15 - 17 0
312 15 - 18 0
313 15 - 19 0
316 16 - 1 0
319 16 - 4 0
320 16 - 5 0
321 16 - 6 0
328 16 - 13 0
329 16 - 14 0
330 16 - 15 0
340 17 - 4 0
341 17 - 5 0
350 17 - 14 0
351 17 - 15 0
353 17 - 17 0
372 18 - 15 0
393 19 - 15 0
395 19 - 17 0
415 20 - 16 0

3.5 Bounding Box of a Few Crimes

In this section, we experiment with building a grid pattern around a single crime incident. We want to experiment with two objectives:

  • Building a bounding box and a small grid pattern with a known number of point incidents.

  • Partition the bounding box into a 4 by 4 grid.

  • Compute a kernel density of the points in the bounding box where each grid cell is a pixel.

  • Extract the numerical density of each grid cell and verify it is approximately equal to the number of points in each grid cell divided by area of the cell in reference units.

Successful implementation of this experiment allows us to work with larger scale bounding boxes and kernel density of more complex boundary regions.

buffer_radius = 4000

# define the center of the bounding box as a single crime incident.
(incident1 = gj_assault_wake_forest %>% filter( OBJECTID == 124406) )
# define the window around the point of a given radius in feet.
w_incident1 = st_sfc( st_geometry( incident1 ) ) %>% st_buffer( buffer_radius )

# define a bounding box around the circle centered at incident1
(bb_incident1 = st_as_sfc( st_bbox(w_incident1) ) )
## Geometry set for 1 feature 
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 2108770 ymin: 752846.8 xmax: 2116770 ymax: 760846.8
## Projected CRS: NAD83 / North Carolina (ftUS)
## POLYGON ((2108770 752846.8, 2116770 752846.8, 2...
# Grab the points inside the bounding box centered at incident1
incidents_in_bb = st_intersection(gj_assault_geocoded, bb_incident1)
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
plt1 = incidents_in_bb  %>% ggplot() + geom_sf(alpha = 0.7 , color = "blue") +
       geom_sf( data = bb_incident1, alpha = 0.3, color = "red") +
       annotation_map_tile(type="osm", progress = "none", alpha = 0.4 )  +
       fixed_plot_aspect(ratio = 1) + ggtitle(paste0("Bounding Box ", buffer_radius , " feet and assaults") )

plt2 = gj_assault_geocoded %>% ggplot() + geom_sf( alpha = 0.5 ) + geom_sf( data = bb_incident1, 
                                                        alpha = 0.5 , 
                                                        color = "red" ) + 
  ggtitle("bigger perspective of bbox")


plot_grid(plt1, plt2)

bb_incident1_grid = bb_incident1 %>% st_make_grid(cellsize = buffer_radius / 2  )
# Define a label scheme on each grid cell.  
# Assume we have a 4x4 grid pattern - based on square having 2 x buffer_radius
# and grid cells is 0.5 * buffer_radius
# -----------------------------------------------------------------

nRows = 4
nCols = 4
bb_incident1_grid_rclab =  bb_incident1_grid %>% st_sf() %>% mutate(id_grid = 1:nrow(.))  %>% 
  mutate( cols = rep(LETTERS[1:nCols], nRows ) ,
          rows = unlist( lapply(1:nRows, rep, nCols) ) ) %>%
  group_by(id_grid) %>%
  mutate( lab = paste(rows, "-", cols,  collapse = '') ) 
bb_incident1_grid_rclab  %>% ggplot() + geom_sf(size=0.5) + 
  geom_text( aes( label = lab, geometry = geometry), 
             stat = "sf_coordinates" , size = 1.3) +  
  geom_sf(data=incidents_in_bb, color = "red", size = 0.3) +
  geom_sf(data=gj_raleigh, fill="skyblue", alpha = 0.5 ) +
  annotation_map_tile(type="osm", progress = "none", alpha = 0.5 ) +
  labs( title = "Mini-Grid in city context")

bb_incident1_grid_rclab  %>% ggplot() + geom_sf(size=0.5) + 
  geom_text( aes( label = lab, geometry = geometry), 
             stat = "sf_coordinates" , size = 4) +  
  geom_sf(data=incidents_in_bb, color = "blue", size = 2) +
  annotation_map_tile(type="osm", progress = "none", alpha = 0.5 ) +
  labs( title = "Mini-Grid and its incidents")

Now let’s define the spatstat ppp object so we can compute the kernel density. There are several ingredients needed to build the density:

  • The points data in sf format. incidents_in_bb
  • The window surrounding the points data bb_incident1
  • The grid cell width buffer_radius / 2.
  • Run the density.ppp function with points, window, grid cell width, kernel type.
  • Convert the density raster to a stars object using st_as_stars
# creates a point pattern object in spatstat from sf objects.

( ppp_incidents_in_bb = c( bb_incident1, st_geometry(incidents_in_bb) ) %>% as.ppp() )
## Warning: data contain duplicated points
## Planar point pattern: 11 points
## window: polygonal boundary
## enclosing rectangle: [2108770.5, 2116770.5] x [752846.8, 760846.8] units

Count the number of incidents.

q1 = quadratcount(ppp_incidents_in_bb, nx = 4, ny = 4)

plot(q1, main = "")
plot(incidents_in_bb, add = TRUE , col = "red" )
## Warning in plot.sf(incidents_in_bb, add = TRUE, col = "red"): ignoring all but
## the first attribute

The density kernel function call details:

dimyx is an argument passed to as.mask. dimyx is vector of length 2 where \(dimyx[1] = m\) is the number of pixels in the y direction. \(dimyx[2]=n\) is the number of pixels in the x-direction. This is consistent with a matrix dimensionality convention but contrary to cartesian coordinates.

An alternative is to use eps to define the grid spacing in the units of the reference system.

den_incidents = density.ppp(ppp_incidents_in_bb, sigma = 500, kernel = "gaussian", dimyx = c(4 , 4) )

plot(den_incidents)

3.6 Grabbing Crime Incidents Near Raleigh Border

Some crime incidents in Raleigh are geocoded to locations near but outside the town corporate limits. They show up as being excluded from the count because they fall outside the polygon. This was manually crosschecked using the Wake County ARC GIS system.

A suitable solution is to extend the polygons of each segment of Raleigh by 30 feet to ensure that crime incidents geocoded to the middle of a street bordering the town boundary are included.

gj_raleigh_buffer = st_buffer(gj_raleigh, dist = 100 ) %>% st_union() %>% st_sf() %>% mutate( id_buffer = 1:nrow(.))

gj_raleigh_buffer %>% ggplot() + geom_sf( fill = "skyblue", color = "darkred") + 
  labs(title = "Raleigh City:  Buffered, Alpha Blending") +
  annotation_map_tile(type="osm", progress = "none", alpha = 0.5 ) 

a1buffer = st_make_valid(gj_raleigh_buffer) %>% st_make_grid(cellsize=grid_cellsize )

a1buffer %>% ggplot() + geom_sf( color = "green") +
  geom_sf(data=gj_raleigh_buffer, fill="skyblue", alpha = 0.5 ) +
  annotation_map_tile(type="osm", progress = "none", alpha = 0.5 ) 

# make bounding box

grid_from_a1buffer = a1buffer %>% st_sf() %>% mutate( id_grid = 1:nrow(.))

raleigh_buff_bbox = st_bbox( a1buffer )

# Calculate the number of rows and columns

nColsBuff = ceiling(( raleigh_buff_bbox[3] - raleigh_buff_bbox[1] ) / grid_cellsize )

nRowsBuff = ceiling(( raleigh_buff_bbox[4] - raleigh_buff_bbox[2] ) / grid_cellsize )

Define the grid of cells over a buffered Raleigh.

# Define a label scheme on each grid cell.  
#
#
grid_from_a1buffer_rclab =  a1buffer %>% st_sf() %>% mutate(id_grid = 1:nrow(.))  %>% 
  mutate( cols = rep(1:nColsBuff, nRowsBuff ) ,
          rows = unlist( lapply(1:nRowsBuff, rep, nColsBuff) ) ) %>%
  group_by(id_grid) %>%
  mutate( lab = paste(rows, "-", cols,  collapse = '') ) %>%
  dplyr::filter(id_grid %in% unlist( st_intersects(gj_raleigh_buffer, .)))

Then join the grid of cells to the crime incidents and see which incidents are not mapped to a grid cell.

gj_assault_hist = st_read(paste0(data_dir, "EXP_EDA_RALEIGH_assaults_Aug2021.geojson" ) )
## Reading layer `EXP_EDA_RALEIGH_assaults_Aug2021' from data source `/Volumes/GDRIVE_SSD/homes/alex/datascience/698_CAPSTONE_2021_FALL/project_data/EXP_EDA_RALEIGH_assaults_Aug2021.geojson' using driver `GeoJSON'
## Simple feature collection with 42305 features and 21 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 2004513 ymin: 681334.3 xmax: 2148277 ymax: 808839.6
## Projected CRS: NAD83 / North Carolina (ftUS)
head(gj_assault_hist)
gj_assault_hist %>% st_join(grid_from_a1buffer_rclab) %>% 
  dplyr::select(OBJECTID, reported_block_address, reported_date, id_grid) %>%
  st_drop_geometry() %>%
  filter(is.na(id_grid)) %>% 
  group_by(reported_block_address) %>% 
  summarize( ct = n())